InĀ [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import math
import seaborn as sns
from sklearn.preprocessing import StandardScaler
InĀ [3]:
df = pd.read_csv(r"C:\Users\sunil\spotify dataset.csv")
df
Out[3]:
track_id track_name track_artist track_popularity track_album_id track_album_name track_album_release_date playlist_name playlist_id playlist_genre ... key loudness mode speechiness acousticness instrumentalness liveness valence tempo duration_ms
0 6f807x0ima9a1j3VPbc7VN I Don't Care (with Justin Bieber) - Loud Luxur... Ed Sheeran 66 2oCs0DGTsRO98Gh5ZSl2Cx I Don't Care (with Justin Bieber) [Loud Luxury... 2019-06-14 Pop Remix 37i9dQZF1DXcZDD7cfEKhW pop ... 6 -2.634 1 0.0583 0.102000 0.000000 0.0653 0.5180 122.036 194754
1 0r7CVbZTWZgbTCYdfa2P31 Memories - Dillon Francis Remix Maroon 5 67 63rPSO264uRjW1X5E6cWv6 Memories (Dillon Francis Remix) 2019-12-13 Pop Remix 37i9dQZF1DXcZDD7cfEKhW pop ... 11 -4.969 1 0.0373 0.072400 0.004210 0.3570 0.6930 99.972 162600
2 1z1Hg7Vb0AhHDiEmnDE79l All the Time - Don Diablo Remix Zara Larsson 70 1HoSmj2eLcsrR0vE9gThr4 All the Time (Don Diablo Remix) 2019-07-05 Pop Remix 37i9dQZF1DXcZDD7cfEKhW pop ... 1 -3.432 0 0.0742 0.079400 0.000023 0.1100 0.6130 124.008 176616
3 75FpbthrwQmzHlBJLuGdC7 Call You Mine - Keanu Silva Remix The Chainsmokers 60 1nqYsOef1yKKuGOVchbsk6 Call You Mine - The Remixes 2019-07-19 Pop Remix 37i9dQZF1DXcZDD7cfEKhW pop ... 7 -3.778 1 0.1020 0.028700 0.000009 0.2040 0.2770 121.956 169093
4 1e8PAfcKUYoKkxPhrHqw4x Someone You Loved - Future Humans Remix Lewis Capaldi 69 7m7vv9wlQ4i0LFuJiE2zsQ Someone You Loved (Future Humans Remix) 2019-03-05 Pop Remix 37i9dQZF1DXcZDD7cfEKhW pop ... 1 -4.672 1 0.0359 0.080300 0.000000 0.0833 0.7250 123.976 189052
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
32828 7bxnKAamR3snQ1VGLuVfC1 City Of Lights - Official Radio Edit Lush & Simon 42 2azRoBBWEEEYhqV6sb7JrT City Of Lights (Vocal Mix) 2014-04-28 ♄ EDM LOVE 2020 6jI1gFr6ANFtT8MmTvA2Ux edm ... 2 -1.814 1 0.0936 0.076600 0.000000 0.0668 0.2100 128.170 204375
32829 5Aevni09Em4575077nkWHz Closer - Sultan & Ned Shepard Remix Tegan and Sara 20 6kD6KLxj7s8eCE3ABvAyf5 Closer Remixed 2013-03-08 ♄ EDM LOVE 2020 6jI1gFr6ANFtT8MmTvA2Ux edm ... 0 -4.462 1 0.0420 0.001710 0.004270 0.3750 0.4000 128.041 353120
32830 7ImMqPP3Q1yfUHvsdn7wEo Sweet Surrender - Radio Edit Starkillers 14 0ltWNSY9JgxoIZO4VzuCa6 Sweet Surrender (Radio Edit) 2014-04-21 ♄ EDM LOVE 2020 6jI1gFr6ANFtT8MmTvA2Ux edm ... 6 -4.899 0 0.0481 0.108000 0.000001 0.1500 0.4360 127.989 210112
32831 2m69mhnfQ1Oq6lGtXuYhgX Only For You - Maor Levi Remix Mat Zo 15 1fGrOkHnHJcStl14zNx8Jy Only For You (Remixes) 2014-01-01 ♄ EDM LOVE 2020 6jI1gFr6ANFtT8MmTvA2Ux edm ... 2 -3.361 1 0.1090 0.007920 0.127000 0.3430 0.3080 128.008 367432
32832 29zWqhca3zt5NsckZqDf6c Typhoon - Original Mix Julian Calor 27 0X3mUOm6MhxR7PzxG95rAo Typhoon/Storm 2014-03-03 ♄ EDM LOVE 2020 6jI1gFr6ANFtT8MmTvA2Ux edm ... 5 -4.571 0 0.0385 0.000133 0.341000 0.7420 0.0894 127.984 337500

32833 rows Ɨ 23 columns

InĀ [4]:
df.describe()
Out[4]:
track_popularity danceability energy key loudness mode speechiness acousticness instrumentalness liveness valence tempo duration_ms
count 32833.000000 32833.000000 32833.000000 32833.000000 32833.000000 32833.000000 32833.000000 32833.000000 32833.000000 32833.000000 32833.000000 32833.000000 32833.000000
mean 42.477081 0.654850 0.698619 5.374471 -6.719499 0.565711 0.107068 0.175334 0.084747 0.190176 0.510561 120.881132 225799.811622
std 24.984074 0.145085 0.180910 3.611657 2.988436 0.495671 0.101314 0.219633 0.224230 0.154317 0.233146 26.903624 59834.006182
min 0.000000 0.000000 0.000175 0.000000 -46.448000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 4000.000000
25% 24.000000 0.563000 0.581000 2.000000 -8.171000 0.000000 0.041000 0.015100 0.000000 0.092700 0.331000 99.960000 187819.000000
50% 45.000000 0.672000 0.721000 6.000000 -6.166000 1.000000 0.062500 0.080400 0.000016 0.127000 0.512000 121.984000 216000.000000
75% 62.000000 0.761000 0.840000 9.000000 -4.645000 1.000000 0.132000 0.255000 0.004830 0.248000 0.693000 133.918000 253585.000000
max 100.000000 0.983000 1.000000 11.000000 1.275000 1.000000 0.918000 0.994000 0.994000 0.996000 0.991000 239.440000 517810.000000
InĀ [5]:
#checking for null values 
df.isnull().sum()
Out[5]:
track_id                    0
track_name                  5
track_artist                5
track_popularity            0
track_album_id              0
track_album_name            5
track_album_release_date    0
playlist_name               0
playlist_id                 0
playlist_genre              0
playlist_subgenre           0
danceability                0
energy                      0
key                         0
loudness                    0
mode                        0
speechiness                 0
acousticness                0
instrumentalness            0
liveness                    0
valence                     0
tempo                       0
duration_ms                 0
dtype: int64
InĀ [6]:
#checking for duplicated values (rows)
duplicate_count = df.duplicated().sum()
print(f"Number of duplicate rows: {duplicate_count}")
Number of duplicate rows: 0
InĀ [7]:
#Dropping the null values
df.dropna(axis=0, inplace= True)
df.isnull().sum()
Out[7]:
track_id                    0
track_name                  0
track_artist                0
track_popularity            0
track_album_id              0
track_album_name            0
track_album_release_date    0
playlist_name               0
playlist_id                 0
playlist_genre              0
playlist_subgenre           0
danceability                0
energy                      0
key                         0
loudness                    0
mode                        0
speechiness                 0
acousticness                0
instrumentalness            0
liveness                    0
valence                     0
tempo                       0
duration_ms                 0
dtype: int64
InĀ [8]:
#Histogram of track_popularity
plt.figure(figsize = (10,6))
sns.histplot(df["track_popularity"], bins = 30, kde= True)
plt.show()
C:\Users\sunil\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
No description has been provided for this image
InĀ [9]:
#BarGraph of Track Popularity vs Speechiness
plt.bar(df["track_popularity"], df["speechiness"])
plt.title("Track Popularity vs Speechiness")
plt.show()
No description has been provided for this image
InĀ [10]:
#Pairplot of 'danceability', 'energy', 'valence', 'tempo'
sns.pairplot(df[['danceability', 'energy', 'valence', 'tempo']])
plt.show()
C:\Users\sunil\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\sunil\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\sunil\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\sunil\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
No description has been provided for this image
InĀ [11]:
#Pairplot of playlist_genre
sns.pairplot(df, hue="playlist_genre")
C:\Users\sunil\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\sunil\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\sunil\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\sunil\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\sunil\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\sunil\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\sunil\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\sunil\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\sunil\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\sunil\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\sunil\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\sunil\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\sunil\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
Out[11]:
<seaborn.axisgrid.PairGrid at 0x1872fffbb10>
No description has been provided for this image
InĀ [12]:
# BarGraph of Track Popularity vs Duration_ms
plt.bar(df["track_popularity"], df["duration_ms"])
plt.title("Track Popularity vs Duration_ms")
plt.show()
No description has been provided for this image
InĀ [13]:
#BarGraph of Tempo vs Duration_ms
plt.bar(df["tempo"], df["duration_ms"])
plt.title("Tempo vs Duration_ms")
plt.show()
No description has been provided for this image
InĀ [14]:
#BarGraph/Figure of Distribution of Playlist Genres
plt.figure(figsize=(10, 6))
df['playlist_genre_string'] = pd.to_numeric(df['playlist_genre'], errors='coerce')
sns.countplot(df['playlist_genre_string'])
plt.title('Distribution of Playlist Genres')
plt.show()
No description has been provided for this image
InĀ [15]:
#BarGraph/Figure of Distribution of Playlist Names
plt.figure(figsize=(10, 6))
df['playlist_name_string'] = pd.to_numeric(df['playlist_name'], errors='coerce' )
sns.countplot(df['playlist_name_string'])
plt.title('Distribution of Playlist Names')
plt.xticks(rotation=90)
plt.show()
No description has been provided for this image
InĀ [16]:
#Show your correlation matrix of features according to the datasets.
corr_matrix = df[['track_popularity', 'danceability', 'energy', 'loudness', 'speechiness', 'acousticness', 
                  'instrumentalness', 'liveness', 'valence', 'tempo', 'duration_ms']].corr()
corr_matrix
Out[16]:
track_popularity danceability energy loudness speechiness acousticness instrumentalness liveness valence tempo duration_ms
track_popularity 1.000000 0.064754 -0.108984 0.057717 0.007067 0.085042 -0.150003 -0.054593 0.033278 -0.005538 -0.143634
danceability 0.064754 1.000000 -0.086074 0.025351 0.181808 -0.024515 -0.008658 -0.123899 0.330538 -0.184132 -0.096922
energy -0.108984 -0.086074 1.000000 0.676662 -0.032184 -0.539732 0.033282 0.161317 0.151050 0.150072 0.012560
loudness 0.057717 0.025351 0.676662 1.000000 0.010313 -0.361646 -0.147823 0.077589 0.053411 0.093761 -0.115039
speechiness 0.007067 0.181808 -0.032184 0.010313 1.000000 0.026168 -0.103385 0.055337 0.064756 0.044649 -0.089432
acousticness 0.085042 -0.024515 -0.539732 -0.361646 0.026168 1.000000 -0.006881 -0.077247 -0.016833 -0.112782 -0.081553
instrumentalness -0.150003 -0.008658 0.033282 -0.147823 -0.103385 -0.006881 1.000000 -0.005505 -0.175406 0.023303 0.063256
liveness -0.054593 -0.123899 0.161317 0.077589 0.055337 -0.077247 -0.005505 1.000000 -0.020432 0.020887 0.006197
valence 0.033278 0.330538 0.151050 0.053411 0.064756 -0.016833 -0.175406 -0.020432 1.000000 -0.025639 -0.032292
tempo -0.005538 -0.184132 0.150072 0.093761 0.044649 -0.112782 0.023303 0.020887 -0.025639 1.000000 -0.001347
duration_ms -0.143634 -0.096922 0.012560 -0.115039 -0.089432 -0.081553 0.063256 0.006197 -0.032292 -0.001347 1.000000
InĀ [17]:
#Showing heatmap of correlation
plt.figure(figsize=(12, 10))
sns.heatmap(corr_matrix, annot=True)
plt.title('Correlation Matrix of Features')
plt.show()
No description has been provided for this image
InĀ [18]:
from sklearn.cluster import KMeans
km=KMeans(n_clusters=5)
InĀ [19]:
Energy_Vs_Loudness = plt.scatter(df["energy"], df["loudness"])
plt.title("Energy VS Loudness")
plt.xlabel('Loudness')
plt.ylabel('Energy')
plt.show()
No description has been provided for this image
InĀ [20]:
#Find out and plot different clusters according to different parameters like playlist genres , playlist names.
km.fit(df[["energy","loudness"]])
C:\Users\sunil\anaconda3\Lib\site-packages\sklearn\cluster\_kmeans.py:870: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
  warnings.warn(
Out[20]:
KMeans(n_clusters=5)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
KMeans(n_clusters=5)
InĀ [21]:
df["Energy_Vs_Loudness"]=km.labels_
df
Out[21]:
track_id track_name track_artist track_popularity track_album_id track_album_name track_album_release_date playlist_name playlist_id playlist_genre ... speechiness acousticness instrumentalness liveness valence tempo duration_ms playlist_genre_string playlist_name_string Energy_Vs_Loudness
0 6f807x0ima9a1j3VPbc7VN I Don't Care (with Justin Bieber) - Loud Luxur... Ed Sheeran 66 2oCs0DGTsRO98Gh5ZSl2Cx I Don't Care (with Justin Bieber) [Loud Luxury... 2019-06-14 Pop Remix 37i9dQZF1DXcZDD7cfEKhW pop ... 0.0583 0.102000 0.000000 0.0653 0.5180 122.036 194754 NaN NaN 1
1 0r7CVbZTWZgbTCYdfa2P31 Memories - Dillon Francis Remix Maroon 5 67 63rPSO264uRjW1X5E6cWv6 Memories (Dillon Francis Remix) 2019-12-13 Pop Remix 37i9dQZF1DXcZDD7cfEKhW pop ... 0.0373 0.072400 0.004210 0.3570 0.6930 99.972 162600 NaN NaN 4
2 1z1Hg7Vb0AhHDiEmnDE79l All the Time - Don Diablo Remix Zara Larsson 70 1HoSmj2eLcsrR0vE9gThr4 All the Time (Don Diablo Remix) 2019-07-05 Pop Remix 37i9dQZF1DXcZDD7cfEKhW pop ... 0.0742 0.079400 0.000023 0.1100 0.6130 124.008 176616 NaN NaN 1
3 75FpbthrwQmzHlBJLuGdC7 Call You Mine - Keanu Silva Remix The Chainsmokers 60 1nqYsOef1yKKuGOVchbsk6 Call You Mine - The Remixes 2019-07-19 Pop Remix 37i9dQZF1DXcZDD7cfEKhW pop ... 0.1020 0.028700 0.000009 0.2040 0.2770 121.956 169093 NaN NaN 1
4 1e8PAfcKUYoKkxPhrHqw4x Someone You Loved - Future Humans Remix Lewis Capaldi 69 7m7vv9wlQ4i0LFuJiE2zsQ Someone You Loved (Future Humans Remix) 2019-03-05 Pop Remix 37i9dQZF1DXcZDD7cfEKhW pop ... 0.0359 0.080300 0.000000 0.0833 0.7250 123.976 189052 NaN NaN 4
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
32828 7bxnKAamR3snQ1VGLuVfC1 City Of Lights - Official Radio Edit Lush & Simon 42 2azRoBBWEEEYhqV6sb7JrT City Of Lights (Vocal Mix) 2014-04-28 ♄ EDM LOVE 2020 6jI1gFr6ANFtT8MmTvA2Ux edm ... 0.0936 0.076600 0.000000 0.0668 0.2100 128.170 204375 NaN NaN 1
32829 5Aevni09Em4575077nkWHz Closer - Sultan & Ned Shepard Remix Tegan and Sara 20 6kD6KLxj7s8eCE3ABvAyf5 Closer Remixed 2013-03-08 ♄ EDM LOVE 2020 6jI1gFr6ANFtT8MmTvA2Ux edm ... 0.0420 0.001710 0.004270 0.3750 0.4000 128.041 353120 NaN NaN 1
32830 7ImMqPP3Q1yfUHvsdn7wEo Sweet Surrender - Radio Edit Starkillers 14 0ltWNSY9JgxoIZO4VzuCa6 Sweet Surrender (Radio Edit) 2014-04-21 ♄ EDM LOVE 2020 6jI1gFr6ANFtT8MmTvA2Ux edm ... 0.0481 0.108000 0.000001 0.1500 0.4360 127.989 210112 NaN NaN 4
32831 2m69mhnfQ1Oq6lGtXuYhgX Only For You - Maor Levi Remix Mat Zo 15 1fGrOkHnHJcStl14zNx8Jy Only For You (Remixes) 2014-01-01 ♄ EDM LOVE 2020 6jI1gFr6ANFtT8MmTvA2Ux edm ... 0.1090 0.007920 0.127000 0.3430 0.3080 128.008 367432 NaN NaN 1
32832 29zWqhca3zt5NsckZqDf6c Typhoon - Original Mix Julian Calor 27 0X3mUOm6MhxR7PzxG95rAo Typhoon/Storm 2014-03-03 ♄ EDM LOVE 2020 6jI1gFr6ANFtT8MmTvA2Ux edm ... 0.0385 0.000133 0.341000 0.7420 0.0894 127.984 337500 NaN NaN 1

32828 rows Ɨ 26 columns

InĀ [23]:
sns.scatterplot(x="energy",y="loudness",hue="Energy_Vs_Loudness",data=df)
Out[23]:
<Axes: xlabel='energy', ylabel='loudness'>
No description has been provided for this image
InĀ [24]:
Danceability_Vs_Valence = plt.scatter(df["danceability"], df["valence"])
plt.title("Danceability Vs Valence")
plt.xlabel('Danceability')
plt.ylabel('Valence')
plt.show()
No description has been provided for this image
InĀ [25]:
km.fit(df[["danceability","valence"]])
C:\Users\sunil\anaconda3\Lib\site-packages\sklearn\cluster\_kmeans.py:870: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
  warnings.warn(
Out[25]:
KMeans(n_clusters=5)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
KMeans(n_clusters=5)
InĀ [26]:
df["Danceability_Vs_Valence"]=km.labels_
df
Out[26]:
track_id track_name track_artist track_popularity track_album_id track_album_name track_album_release_date playlist_name playlist_id playlist_genre ... acousticness instrumentalness liveness valence tempo duration_ms playlist_genre_string playlist_name_string Energy_Vs_Loudness Danceability_Vs_Valence
0 6f807x0ima9a1j3VPbc7VN I Don't Care (with Justin Bieber) - Loud Luxur... Ed Sheeran 66 2oCs0DGTsRO98Gh5ZSl2Cx I Don't Care (with Justin Bieber) [Loud Luxury... 2019-06-14 Pop Remix 37i9dQZF1DXcZDD7cfEKhW pop ... 0.102000 0.000000 0.0653 0.5180 122.036 194754 NaN NaN 1 0
1 0r7CVbZTWZgbTCYdfa2P31 Memories - Dillon Francis Remix Maroon 5 67 63rPSO264uRjW1X5E6cWv6 Memories (Dillon Francis Remix) 2019-12-13 Pop Remix 37i9dQZF1DXcZDD7cfEKhW pop ... 0.072400 0.004210 0.3570 0.6930 99.972 162600 NaN NaN 4 2
2 1z1Hg7Vb0AhHDiEmnDE79l All the Time - Don Diablo Remix Zara Larsson 70 1HoSmj2eLcsrR0vE9gThr4 All the Time (Don Diablo Remix) 2019-07-05 Pop Remix 37i9dQZF1DXcZDD7cfEKhW pop ... 0.079400 0.000023 0.1100 0.6130 124.008 176616 NaN NaN 1 0
3 75FpbthrwQmzHlBJLuGdC7 Call You Mine - Keanu Silva Remix The Chainsmokers 60 1nqYsOef1yKKuGOVchbsk6 Call You Mine - The Remixes 2019-07-19 Pop Remix 37i9dQZF1DXcZDD7cfEKhW pop ... 0.028700 0.000009 0.2040 0.2770 121.956 169093 NaN NaN 1 1
4 1e8PAfcKUYoKkxPhrHqw4x Someone You Loved - Future Humans Remix Lewis Capaldi 69 7m7vv9wlQ4i0LFuJiE2zsQ Someone You Loved (Future Humans Remix) 2019-03-05 Pop Remix 37i9dQZF1DXcZDD7cfEKhW pop ... 0.080300 0.000000 0.0833 0.7250 123.976 189052 NaN NaN 4 2
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
32828 7bxnKAamR3snQ1VGLuVfC1 City Of Lights - Official Radio Edit Lush & Simon 42 2azRoBBWEEEYhqV6sb7JrT City Of Lights (Vocal Mix) 2014-04-28 ♄ EDM LOVE 2020 6jI1gFr6ANFtT8MmTvA2Ux edm ... 0.076600 0.000000 0.0668 0.2100 128.170 204375 NaN NaN 1 4
32829 5Aevni09Em4575077nkWHz Closer - Sultan & Ned Shepard Remix Tegan and Sara 20 6kD6KLxj7s8eCE3ABvAyf5 Closer Remixed 2013-03-08 ♄ EDM LOVE 2020 6jI1gFr6ANFtT8MmTvA2Ux edm ... 0.001710 0.004270 0.3750 0.4000 128.041 353120 NaN NaN 1 3
32830 7ImMqPP3Q1yfUHvsdn7wEo Sweet Surrender - Radio Edit Starkillers 14 0ltWNSY9JgxoIZO4VzuCa6 Sweet Surrender (Radio Edit) 2014-04-21 ♄ EDM LOVE 2020 6jI1gFr6ANFtT8MmTvA2Ux edm ... 0.108000 0.000001 0.1500 0.4360 127.989 210112 NaN NaN 4 3
32831 2m69mhnfQ1Oq6lGtXuYhgX Only For You - Maor Levi Remix Mat Zo 15 1fGrOkHnHJcStl14zNx8Jy Only For You (Remixes) 2014-01-01 ♄ EDM LOVE 2020 6jI1gFr6ANFtT8MmTvA2Ux edm ... 0.007920 0.127000 0.3430 0.3080 128.008 367432 NaN NaN 1 1
32832 29zWqhca3zt5NsckZqDf6c Typhoon - Original Mix Julian Calor 27 0X3mUOm6MhxR7PzxG95rAo Typhoon/Storm 2014-03-03 ♄ EDM LOVE 2020 6jI1gFr6ANFtT8MmTvA2Ux edm ... 0.000133 0.341000 0.7420 0.0894 127.984 337500 NaN NaN 1 4

32828 rows Ɨ 27 columns

InĀ [27]:
sns.scatterplot(x="danceability",y="valence",hue="Danceability_Vs_Valence",data=df)
Out[27]:
<Axes: xlabel='danceability', ylabel='valence'>
No description has been provided for this image
InĀ [28]:
#Build your model and show your final result so that the recommendation system can be built based on that.
# Features selected for clustering
df = pd.DataFrame(df)
features = ['danceability', 'energy', 'loudness', 'acousticness', 'instrumentalness', 'valence', 'tempo']
X = df[features]

# Standardize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Apply K-means clustering
kmeans = KMeans(n_clusters=5, random_state=42)  # Adjust the number of clusters as needed
kmeans.fit(X_scaled)

# Assign cluster labels to the original data
df['cluster'] = kmeans.labels_

# Recommendation function
def recommend_songs(song_id, df, num_recommendations=5):
    if song_id not in df['track_id'].values:
        print("Song ID not found in the dataset.")
        return None
    
    # Find the cluster of the input song
    song_cluster = df[df['track_id'] == song_id]['cluster'].values[0]

    # Filter songs from the same cluster
    recommended_songs = df[df['cluster'] == song_cluster]

    # Remove the input song from the recommendations
    recommended_songs = recommended_songs[recommended_songs['track_id'] != song_id]

    # Randomly select songs from the same cluster
    recommendations = recommended_songs.sample(n=num_recommendations)
    return recommendations[['track_id', 'track_name', 'track_artist', 'cluster']]

# Example usage
song_id_example = df['track_id'].iloc[0]
recommendations = recommend_songs(song_id_example, df, num_recommendations=5)
print(recommendations)
C:\Users\sunil\anaconda3\Lib\site-packages\sklearn\cluster\_kmeans.py:870: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
  warnings.warn(
                     track_id  \
19090  6K7V47wFpfam2aeE893rbL   
9530   5FvlQZTm4eOGeUiAB1bKc9   
32332  3SyV3b5lqDmXpuhY9Sk5Na   
78     3vQhEd4lOg4mAV6CHCgoHW   
16472  0XeSTQIu2LqnAKdOigQUtT   

                                              track_name  track_artist  \
19090                                      Suave - Remix  Jey Blessing   
9530                                         No Vaseline      Ice Cube   
32332                             The End (Original Mix)   Tommy Trash   
78     South of the Border (feat. Camila Cabello & Ca...    Ed Sheeran   
16472                   Clandestino (feat. Calypso Rose)     Manu Chao   

       cluster  
19090        0  
9530         0  
32332        0  
78           0  
16472        0